get_episode_wo_CI_function_four_item_afi.R <- function(data = vdem,
                        start_incl = 0.01,
                        cum_incl = 0.1,
                        year_turn = 0.03,    
                        cum_turn = 0.1, 
                        tolerance = 4)
{
  
  if(year_turn == 0)
    print("You set year_turn = 0. Did you mean to do this? Doing so means an episode ends when it experiences a year of no annual change on the EDI. Perhaps, instead, you meant to set its value equal to cum_turn. See p.3 of the ERT codebook.")
  
  
  ### DATA CLEANING AND PREP ###
  
  # selecting the variables we need to construct the episodes dataframe 
  full.df <- data %>%
    dplyr::select(country_name, country_id, country_text_id, year,
                  codingstart, codingend, matches("v2xca_academ", ignore.case = FALSE),
                  gapstart1, gapstart2, gapstart3, gapend1, gapend2, gapend3, v2x_regime) %>%
    dplyr::filter(year >= 1900) %>%
    dplyr::arrange(country_text_id, year) %>%
    dplyr::group_by(country_id) %>%
    # make codingstart 1900 or first year thereafter
    dplyr::mutate(codingstart2 = min(hablar::s(ifelse(!is.na(v2x_regime), year, NA))),
                  # tag original sample for later use
                  origsample = 1) %>%
    # we need to deal with gaps in v-dem coding
    # this balances the dataset
    plm::make.pconsecutive(balanced = TRUE, index = c("country_id", "year")) %>%
    dplyr::group_by(country_id) %>%
    # this fills missing variables we need that are constant within countries
    tidyr::fill(c(country_text_id, country_name, codingend, gapstart1, gapend1, gapstart2, gapend2,
                  gapstart3, gapend3)) %>%
    tidyr::fill(c(country_text_id, country_name,codingend, gapstart1, gapend1, gapstart2, gapend2,
                  gapstart3, gapend3), .direction = "up")  %>%
    # here we need to recode the gaps as only during the period prior to and during the gap (for our "uncertain" variables)
    dplyr::mutate(gapstart = ifelse(year <= gapend1, gapstart1, NA),
                  gapend = ifelse(year <= gapend1, gapend1, NA),
                  gapstart = ifelse(!is.na(gapend2) & year > gapend1 & year <= gapend2, gapstart2, gapstart),
                  gapend = ifelse(!is.na(gapend2) & year > gapend1 & year <= gapend2, gapend2, gapend),
                  gapstart = ifelse(!is.na(gapend3) & year > gapend2 & year <= gapend3, gapstart3, gapstart),
                  gapend = ifelse(!is.na(gapend3) & year > gapend2 & year <= gapend3, gapend3, gapend)) %>%
    
    ### CODING THE DECLINE EPISODES of Academic Freedom ###
    
    group_by(country_name) %>%
    mutate(VAR_1 = v2xca_academ_four - lag(v2xca_academ_four, 1), 
           start_auto = ifelse(VAR_1<=-start_incl, 1, NA), 
           min_1 = lead(VAR_1, 1), 
           min_2 = lead(VAR_1, 2), 
           min_3 = lead(VAR_1, 3), 
           min_4 = lead(VAR_1, 4), 
           auto_end = ifelse(min_1<=-start_incl | min_2<=-start_incl | min_3<=-start_incl | min_4<=-start_incl, 0, 1),
           auto_end2 = ifelse(VAR_1>=year_turn, 1, 0))  %>%
    
    # start_auto = helper for start episode of X
    # auto_end  = helper for end of an episode of X
    # auto_end_2  = helper 2 for end of an episode of X
    
    mutate(start_auto_1 = ifelse(lag(start_auto)==1, 1, 0), 
           start_auto_2 = ifelse(lag(start_auto, 2)==1, 1, 0), 
           start_auto_3 = ifelse(lag(start_auto, 3)==1, 1, 0), 
           start_auto_4 = ifelse(lag(start_auto, 4)==1, 1, 0), 
           start_auto_four = coalesce(start_auto_1, start_auto_2, start_auto_3, start_auto_4), 
           start_auto_four = ifelse(is.na(start_auto_four), 0, start_auto_four))  %>%
    dplyr::select(-c(start_auto_1, start_auto_2, start_auto_3, start_auto_4)) %>%
    dplyr::select(-c(min_1, min_2, min_3, min_4)) %>%
    fill(auto_end)
  
  
  # detect and save potential episodes with the help of the function 
  
  full.df <- as.data.frame(full.df)
  for (i in 1:nrow(full.df)) {
    full.df[i,][is.na(full.df[i,]) & full.df$auto_end[i-1] == 0 & full.df$start_auto_four[i] == 1 & full.df$start_auto[i-1] ==1 &
                  full.df$auto_end2[i] != 1] <- 1 
  }
  
  
  
  ## Episodes with a total magnitude of cum_incl decline  ##
  
  # first step: create auto_period ID
  full.df <- full.df %>%
    mutate(start_auto = ifelse(is.na(start_auto), 0, start_auto)) %>%
    group_by(country_name) %>%
    mutate(group_id = cumsum(start_auto != lag(start_auto, default = FALSE))) %>% # group_id for each autocra_period
    mutate(episode_id = str_c(country_name, group_id, sep = "_")) %>%
    
    # second step: create dummy in which decline was greater than Z
    
    group_by(episode_id) %>%
    mutate(last_VAR = last(v2xca_academ_four), 
           last_VAR_codelow = last(v2xca_academ_codelow_four), 
           last_VAR_codehigh = last(v2xca_academ_codehigh_four))%>%
    ungroup()%>%
    mutate(auto_dum = last_VAR - lag(last_VAR)) %>%
    dplyr::select(-c(group_id)) %>%
    group_by(episode_id) %>%
    mutate(decline_episode = ifelse(start_auto==1 & first(auto_dum) <= -cum_incl, 1, 0 )) %>% 
    rename(decline_sum = auto_dum, 
           decline_episode_id = episode_id) %>% # no overlap of confidence intervals before and after episode 
    group_by(country_text_id) %>%
   
    # then we clean out variables for non-manifest episodes
    dplyr::mutate(decline_episode_id = ifelse(decline_episode!=1, NA, decline_episode_id)) %>%
    group_by(decline_episode_id) %>%
    
    # generate the initial end year for the episode (note:  we have to filter out the stasis years that C++ gives us, but we will do this later):
    dplyr::mutate(decline_episode_end_year = ifelse(decline_episode==1, last(year), NA),
                  decline_episode_uncertain = ifelse(decline_episode==1 & codingend-decline_episode_end_year<tolerance, 1, 0),
                  decline_episode_uncertain = ifelse(decline_episode==1 & !is.na(gapstart) & (gapstart-1)-decline_episode_end_year<tolerance, 1, decline_episode_uncertain), 
                  # generate the start year for the potential episode as the first year after the pre-episode year
                  decline_episode_start_year = ifelse(decline_episode==1, first(year), NA)) %>%
    
    # here we code a dummy for the pre-episode year
    dplyr::mutate(v2xca_academ_pre_ep_year = ifelse(decline_episode==1, ifelse(year == dplyr::first(year), 1, 0), 0),
                  
                  # we create a unique identifier for episodes using the country_text_id, start, and end years
                  decline_episode_id = ifelse(decline_episode==1, paste(country_text_id, decline_episode_start_year, decline_episode_end_year, sep = "_"), NA)) %>%
    
    dplyr::ungroup() %>%
    # make sure the data is sorted properly
    dplyr::arrange(country_name, year) %>%
    # just to make sure we have a dataframe
    as.data.frame %>%
    
    # code termination type of decline episode
    
    # decline episodes end when one of five things happens:
    # 1. cumulative drop: the case experiences a gradual drop <= cum_turn over the tolerance period (or less)
    
    
    # first find the last positive change on EDI equal to the start_incl parameter
    # this will become the new end of episodes at some point, once we clean things up
    dplyr::group_by(decline_episode_id) %>%
    dplyr::mutate(last_ch_year = max(hablar::s(ifelse(v2xca_academ_four-dplyr::lag(v2xca_academ_four, n=1)>=start_incl, year, NA))),
                  # here we just replace with NA non-episode years
                  last_ch_year = ifelse(decline_episode==0, NA, last_ch_year))
  
  # first write a small function to deal with Inf
  my.max <- function(x) ifelse(!all(is.na(x)), max(x, na.rm=T), NA)
  
  # now we look for a gradual drop equal to cum_incr over the tolerance period
  cum_incr <- list()
  # here we loop over time equal to the tolerance, looking for the difference between the last_change_year and that year on the EDI
  for (i in 1:tolerance) {
    cum_incr[[i]] <- ifelse(full.df$year == full.df$last_ch_year & dplyr::lead(full.df$country_id, n=i)==full.df$country_id, dplyr::lead(full.df$v2xca_academ_four, n=i)-full.df$v2xca_academ_four, NA)
  }
  
  # then we rename the columns and generate a dataframe we can use for our existing data
  df <- do.call(cbind, lapply(cum_incr, data.frame, stringsAsFactors=FALSE))
  names <- paste0('cum', seq(1:tolerance))
  colnames(df) <- names
  cum_incr <- df %>%
    dplyr::mutate(cum_incr = ifelse(apply(df, 1, FUN = my.max) >= cum_turn, 1,NA)) %>%
    dplyr::select(cum_incr)
  
  # merge these new columns to our full.df
  full.df <- full.df %>%
    tibble::rownames_to_column('newid') %>%
    left_join(tibble::rownames_to_column(cum_incr, 'newid'), by = 'newid') %>%
    dplyr::select(-newid) %>%
    
    # now we can finally code our termination variable
    # first we group by episode
    dplyr::group_by(decline_episode_id) %>%
    dplyr::arrange(decline_episode_id, year) %>%
    # first, lets fill everything in for the episode
    dplyr::mutate(cum_incr = ifelse(decline_episode==1, max(hablar::s(cum_incr)), NA)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(decline_episode_id = ifelse(decline_episode==1, paste(country_text_id, decline_episode_start_year, decline_episode_end_year, sep = "_"), NA), 
                  decline_sum = ifelse(decline_episode!=1, NA, decline_sum), 
                  decline_episode_start_year = ifelse(decline_episode!=1, NA, decline_episode_start_year), 
                  decline_episode_end_year = ifelse(decline_episode!=1, NA, decline_episode_end_year)) %>%
    dplyr::group_by(country_text_id) %>%
    dplyr::arrange(country_id, year) %>%
    
    # code censored/ongoing episodes for survival analysis, new dummy variable
    dplyr::mutate(decline_episode_censored = ifelse(decline_episode==1 & decline_episode_uncertain == 1, 1, NA)) %>%
    
    ### CODING THE INCREASE EPISODES of Academic Freedom ###
    
    dplyr::group_by(country_text_id) %>%
    mutate(VAR_1 = v2xca_academ_four - lag(v2xca_academ_four, 1), # hier entsprechende Varibale einsetzen
           start_auto = ifelse(VAR_1>=start_incl, 1, NA), 
           min_1 = lead(VAR_1, 1), 
           min_2 = lead(VAR_1, 2), 
           min_3 = lead(VAR_1, 3), 
           min_4 = lead(VAR_1, 4), 
           auto_end = ifelse(min_1>=start_incl | min_2>=start_incl | min_3>=start_incl | min_4>=start_incl, 0, 1),
           auto_end2 = ifelse(VAR_1<=-year_turn, 1, 0)) %>%
    
    # start_auto = helper for start episode of X
    # auto_end  = helper for end of an episode of X
    # auto_end_2  = helper 2 for end of an episode of X
    
    mutate(start_auto_1 = ifelse(lag(start_auto)==1, 1, 0), 
           start_auto_2 = ifelse(lag(start_auto, 2)==1, 1, 0), 
           start_auto_3 = ifelse(lag(start_auto, 3)==1, 1, 0), 
           start_auto_4 = ifelse(lag(start_auto, 4)==1, 1, 0), 
           start_auto_four = coalesce(start_auto_1, start_auto_2, start_auto_3, start_auto_4), 
           start_auto_four = ifelse(is.na(start_auto_four), 0, start_auto_four))  %>%
    dplyr::select(-c(start_auto_1, start_auto_2, start_auto_3, start_auto_4)) %>%
    dplyr::select(-c(min_1, min_2, min_3, min_4)) %>%
    fill(auto_end)
  
  
  # detect and save potential episodes with the help of the function 
  
  
  full.df <- as.data.frame(full.df)
  for (i in 1:nrow(full.df)) {
    full.df[i,][is.na(full.df[i,]) & full.df$auto_end[i-1] == 0 & full.df$start_auto_four[i] == 1 & full.df$start_auto[i-1] ==1 &
                  full.df$auto_end2[i] != 1] <- 1 
  }
  
  
  ## Episodes with a total magnitude of cum_turn INCREASE ##
  
  # first step: create increase_period_id
  full.df <- full.df %>%
    mutate(start_auto = ifelse(is.na(start_auto), 0, start_auto)) %>%
    group_by(country_name) %>%
    mutate(group_id = cumsum(start_auto != lag(start_auto, default = FALSE))) %>% # group_id for each increase_episode
    mutate(episode_id = str_c(country_name, group_id, sep = "_")) %>%
    
    # second step: create dummy in which decline was greater than Z
    
    group_by(episode_id) %>%
    mutate(last_VAR = last(v2xca_academ_four), 
           last_VAR_codelow = last(v2xca_academ_codelow_four), 
           last_VAR_codehigh = last(v2xca_academ_codehigh_four))%>%
    ungroup()%>%
    mutate(auto_dum = last_VAR - lag(last_VAR)) %>%
    dplyr::select(-c(group_id)) %>%
    group_by(episode_id) %>%
    mutate(increase_episode = ifelse(start_auto==1 & first(auto_dum) >= cum_incl, 1, 0 )) %>% 
    rename(increase_sum = auto_dum, 
           increase_episode_id = episode_id) %>% # no overlap of confidence intervals before and after episode 
    group_by(country_text_id) %>%
    mutate(lag_v2xca_academ_four = lag(v2xca_academ_four, 1)) %>%

    # then we clean out variables for non-manifest episodes
    dplyr::mutate(increase_episode_id = ifelse(increase_episode!=1, NA, increase_episode_id)) %>%
    group_by(increase_episode_id) %>%
    
    # generate the initial end year for the episode (note:  we have to filter out the stasis years that C++ gives us, but we will do this later):
    dplyr::mutate(increase_episode_end_year = ifelse(increase_episode==1, last(year), NA),
                  increase_episode_uncertain = ifelse(increase_episode==1 & codingend-increase_episode_end_year<tolerance, 1, 0),
                  increase_episode_uncertain = ifelse(increase_episode==1 & !is.na(gapstart) & (gapstart-1)-increase_episode_end_year<tolerance, 1, increase_episode_uncertain), 
                  # generate the start year for the potential episode as the first year after the pre-episode year
                  increase_episode_start_year = ifelse(increase_episode==1, first(year), NA)) %>%
    
    # here we code a dummy for the pre-episode year
    dplyr::mutate(v2xca_academ_pre_ep_year_increase = ifelse(increase_episode==1, ifelse(year == dplyr::first(year), 1, 0), 0),
                  
                  # we create a unique identifier for episodes using the country_text_id, start, and end years
                  increase_episode_id = ifelse(increase_episode==1, paste(country_text_id, increase_episode_start_year, increase_episode_end_year, sep = "_"), NA)) %>%
    
    dplyr::ungroup() %>%
    # make sure the data is sorted properly
    dplyr::arrange(country_name, year) %>%
    # just to make sure we have a dataframe
    as.data.frame %>%
    
    # code termination type of decline episode
    
    # decline episodes end when one of five things happens:
    # 1. cumulative drop: the case experiences a gradual drop <= cum_turn over the tolerance period (or less)
    
    
    # first find the last positive change on EDI equal to the start_incl parameter
    # this will become the new end of episodes at some point, once we clean things up
    dplyr::group_by(increase_episode_id) %>%
    dplyr::mutate(last_ch_year = max(hablar::s(ifelse(v2xca_academ_four-dplyr::lag(v2xca_academ_four, n=1)>=start_incl, year, NA))),
                  # here we just replace with NA non-episode years
                  last_ch_year = ifelse(increase_episode==0, NA, last_ch_year))
  
  # first write a small function to deal with Inf
  my.min <- function(x) ifelse(!all(is.na(x)), min(x, na.rm=T), NA)
  
  # now we look for a gradual drop equal to cum_drop over the tolerance period
  cum_drop <- list()
  # here we loop over time equal to the tolerance, looking for the difference between the last_change_year and that year on the EDI
  for (i in 1:tolerance) {
    cum_drop[[i]] <- ifelse(full.df$year == full.df$last_ch_year & dplyr::lead(full.df$country_id, n=i)==full.df$country_id, dplyr::lead(full.df$v2xca_academ_four, n=i)-full.df$v2xca_academ_four, NA)
  }
  
  # then we rename the columns and generate a dataframe we can use for our existing data
  df <- do.call(cbind, lapply(cum_drop, data.frame, stringsAsFactors=FALSE))
  names <- paste0('cum', seq(1:tolerance))
  colnames(df) <- names
  cum_drop <- df %>%
    dplyr::mutate(cum_drop = ifelse(apply(df, 1, FUN = my.min) <= cum_turn*-1, 1,NA)) %>%
    dplyr::select(cum_drop)
  
  # merge these new columns to our full.df
  full.df <- full.df %>%
    tibble::rownames_to_column('newid') %>%
    left_join(tibble::rownames_to_column(cum_drop, 'newid'), by = 'newid') %>%
    dplyr::select(-newid) %>%
    
    # now we can finally code our termination variable
    # first we group by episode
    dplyr::group_by(increase_episode_id) %>%
    dplyr::arrange(increase_episode_id, year) %>%
    # first, lets fill everything in for the episode
    dplyr::ungroup() %>%
    dplyr::mutate(increase_episode_id = ifelse(increase_episode==1, paste(country_text_id, increase_episode_start_year, increase_episode_end_year, sep = "_"), NA), 
                  increase_sum = ifelse(increase_episode!=1, NA, increase_sum), 
                  increase_episode_start_year = ifelse(increase_episode!=1, NA, increase_episode_start_year), 
                  increase_episode_end_year = ifelse(increase_episode!=1, NA, increase_episode_end_year), 
                  decline_episode_id = ifelse(decline_episode!=1, NA, decline_episode_id), 
                  decline_sum = ifelse(decline_episode!=1, NA, decline_sum), 
                  decline_sum = ifelse(decline_episode!=1, NA, decline_sum), 
                  decline_episode_start_year = ifelse(decline_episode!=1, NA, decline_episode_start_year),
                  decline_episode_end_year = ifelse(decline_episode!=1, NA, decline_episode_end_year), 
                  decline_episode_censored = ifelse(decline_episode!=1, NA, decline_episode_censored)) %>%
    
    dplyr::group_by(country_text_id) %>%
    dplyr::arrange(country_id, year) %>%
    
    # code censored/ongoing episodes for survival analysis, new dummy variable
    dplyr::mutate(increase_episode_censored = ifelse(increase_episode==1 & increase_episode_uncertain == 1, 1, NA)) %>%
    
    dplyr::filter(!is.na(origsample)) %>%
    dplyr::select(country_id, country_text_id, country_name, v2xca_academ_four, year, 
                  increase_episode, increase_episode_id, increase_sum, increase_episode_start_year, increase_episode_end_year, 
                  increase_episode_censored, 
                  decline_episode, decline_episode_id, decline_sum, decline_episode_start_year, decline_episode_end_year, 
                  decline_episode_censored, lag_v2xca_academ_four) %>%
    ungroup()
  
  
  {
    return(full.df)
    }
}

